# =============================================================================
# This code is intended to show a sample illustrating the IRSFFA data
#   Code was used to read and organize the full IRSFFA data that will not be provided. 
#   The raw data is proprietary and will not be provided. 
#   The output of this code is provided.
#   NOTE: THE DATA IS A RANDOMIZED SAMPLE. ANALYSIS WITH THIS SAMPLE IS NOT FEASIBLE AND RESULTS WILL NOT BE INTERPRETABLE.
# =============================================================================

# #Import the main datasets and make copies
# irsffa = pd.read_parquet(r'D:\jodo_emga\Plasma\Data\HFS_COVID\Cleaned\hfsdf_f_08292021.parquet')
# #Create a random sample
# irsffa_cleaned_sample = resample(df=irsffa,n=100000)
# irsffa_cleaned_sample.to_parquet(cdd['p_d_irsffa'] + r'\Sample\irsffa_cleaned_sample.parquet')

#Write out the Stata sample
irsffa_labels = pd.read_parquet(cdd['p_d_irsffa'] + r'\Sample\Labels.parquet')
irsffa_cleaned_sample = pd.read_parquet(cdd['p_d_irsffa'] + r'\Sample\irsffa_cleaned_sample.parquet')
labels = irsffa_labels.set_index('variable')['label'].to_dict()
irsffa_cleaned_sample[labels.keys()].to_stata(cdd['p_d_irsffa'] + r'\Sample\irsffa_cleaned_sample.dta',version=117, variable_labels=labels)




